*** How worried should we be? The implications of fabricated survey data for political science
*** Figure A11. Comparing Regression Results in Peru
*** Must have coefplot, grc1leg2 packages installed

set more off

* set directory to location of dataset in following line
cd "C:\~\Downloads\"

use "QAC_2017_12.21.17.dta", clear

keep if pais == 11

gen clean_data = 1 if cancelled == 0
replace clean_data = 0 if cancelled == 1
replace clean_data = 0 if ls3 == 999999
replace clean_data = 0 if fraud == 1
replace fraud = 0 if ls3 == 999999

** gen age in years
gen birth_yr = q2
replace birth_yr = . if inlist(birth_yr, 888888, 988888, 999999)

gen svy_date = date(date, "MDYhm")
format svy_date %td
gen year = year(svy_date)

gen upload_time = Clock(upload, "MDYhm")
format upload_time %tC
gen upload_hour = hhC(upload_time)

gen age_yrs = birth_yr
gen edad = 1 if age_yrs >= 18 & age_yrs <=25
replace edad = 2 if age_yrs >= 26 & age_yrs <=35
replace edad = 3 if age_yrs >= 36 & age_yrs <=45
replace edad = 4 if age_yrs >= 46 & age_yrs <=55
replace edad = 5 if age_yrs >= 56 & age_yrs <=65
replace edad = 6 if age_yrs >= 66 
label var edad "Age Cohort"
label define agegroup 1 "18-25 years old" 2 "26-35 years old" 3 "36-45 years old" 4 "46-55 years old" 5 "56-65 years old" 6 "66+ years old" 
label values edad agegroup

* education level
gen ed_level = 1 if inlist(ed, 0, 1, 2, 3, 4, 5, 6)
replace ed_level = 2 if inlist(ed, 7, 8, 9, 10, 11)
replace ed_level = 3 if inlist(ed, 12, 13, 14, 15, 16, 17, 18)
label var ed_level "Education Level"
label define edlevel 1 "None or Primary Ed." 2 "Secondary Ed." 3 "University+ Ed."
label values ed_level edlevel
label var ed "Years of Education"
replace ed = . if inlist(ed, 888888, 988888, 999999)

** matching
 set seed 339487731
* first flag cases that were canceled but not labeled as fraudulent
gen canceled_nonfraud = 1 if status == "Canceled" & fraud == 0
replace canceled_nonfraud = 0 if canceled_nonfraud == .
drop if canceled_nonfraud == 1

cem upm1a (#0) q1 (#0) agequota (#0) if canceled_nonfraud == 0, tr(fraud) k2k

** label data for full sample comparisons
* first drop canceled cases that were not likelyfrauds and matched to clean cases
drop if clean_data == 0 & cem_matched != 1
* 1 = fake, 2 = clean matched, 3 = rest of data
gen comparison_groups = 1 if fraud == 1 & cem_matched == 1
replace comparison_groups = 2 if fraud == 0 & cem_matched == 1
replace comparison_groups = 3 if fraud == 0 & cem_matched == 0

* double the "rest of the dataset" (non-matched clean interviews)
gen exp=1
replace exp=2 if comparison_group == 3
expand exp, gen(copy)

* compromised versus clean indicator (1 = clean, 2 = compromised)
gen clean_or_compr = .
replace clean_or_compr = 1 if comparison_group == 2 | copy == 0
replace clean_or_compr = 2 if comparison_group == 1 | copy == 1

gen clean = 1 if clean_or_compr == 1
replace clean = 0 if clean_or_compr == 2
lab define clean_comp_lab 0 "Compromised" 1 "Clean"
lab values clean clean_comp_lab


* recode missing values
replace dst1b = dst1b1 if dst1b2 == 999999 & pais != 22
replace dst1b = dst1b2 if dst1b1 == 999999 & pais != 22
replace env2b = env2b1 if env2b2 == 999999 & pais != 22
replace env2b = env2b2 if env2b1 == 999999 & pais != 22
replace drk1 = drk11 if drk12 == 999999 & pais != 22
replace drk1 = drk12 if drk11 == 999999 & pais != 22
replace env1c = env1c1 if env1c2 == 999999 & pais != 22
replace env1c = env1c2 if env1c1 == 999999 & pais != 22
replace mil10un = mil10un1 if mil10un2 == 999999 & pais != 22
replace mil10un = mil10un2 if mil10un1 == 999999 & pais != 22
replace mil10a = mil10a1 if mil10a2 == 999999 & pais != 22
replace mil10a = mil10a2 if mil10a1 == 999999 & pais != 22
replace mil10e = mil10e1 if mil10e2 == 999999 & pais != 22
replace mil10e = mil10e2 if mil10e1 == 999999 & pais != 22
replace mil10oas = mil10oas1 if mil10oas2 == 999999 & pais != 7
replace mil10oas = mil10oas2 if mil10oas1 == 999999 & pais != 7
replace mil10oas = mil10oa if pais == 22



* presidential approval
recode m1 (1=5)(2=4)(3=3)(4=2)(5=1)(888888 988888 999999=.), gen(pres_approval)
label var pres_approval "Presidential Approval"
* turnout in 2016
gen turnout_2016 = 1 if vb2 == 1 & vb1 != 2
replace turnout_2016 = 0 if vb2 != 1 & vb1 != 2
label var turnout_2016 "Turnout in 2016"
* vote for kuczynski in 2013
gen kuczynski_2016 = 1 if vb3n == 1611 
replace kuczynski_2016 = 0 if vb3n != 1101
replace kuczynski_2016 = . if vb1 == 2
label var kuczynski_2016 "Vote for Kuczynski 2016"
* nonvoter in 2016
gen nonvoter_2016 = 1 if  turnout_2016 == 0
replace nonvoter_2016 = 0 if  turnout_2016 == 1
label var nonvoter_2016 "Non-Voter in 2016"
* community insecurity
recode aoj11 (888888 988888 999999=.), gen(local_insecurity)
label var local_insecurity "Neighborhood Insecurity"
* crime victimization
recode vic1ext (2=0)(888888 988888 999999=.), gen(crime_victim)
label var crime_victim "Crime Victimization"
* national econ evaluation
recode soct2 (3=1)(2=2)(1=3)(888888 988888 999999=.), gen(national_econ)
label var national_econ "National Economy Evaluation"
* personal econ evalaution
recode idio2 (3=1)(2=2)(1=3)(888888 988888 999999=.), gen(personal_econ)
label var personal_econ "Personal Economy Evaluation"
* trust in national police
recode b18 (888888 988888 999999=.), gen(trust_police)
label var trust_police "Trust in National Police"
* left-right ideology
recode l1 (888888 988888 999999=.), gen(left_right)
label var left_right "Left-Right Self Placement"
* left wing
gen left = 1 if inlist(l1, 1, 2, 3)
replace left = 0 if left == . & l1 != 999999
label var left "Left"
* center
gen center = 1 if inlist(l1, 4, 5, 6, 7)
replace center = 0 if center == . & l1 != 999999
label var center "Center"
* right wing
gen right = 1 if inlist(l1, 8, 9, 10)
replace right = 0 if right == . & l1 != 999999
label var right "Right"
* dk/na ideology
gen non_ideo = 1 if inlist(l1, 888888, 988888)
replace non_ideo = 0 if non_ideo == . & l1 != 999999
label var non_ideo "DK/NA Ideology"
* skintone
gen skintone = colorr
replace skintone = . if colorr == 999999
label var skintone "Skin Tone"
* interviewer skintone
gen skintone_int = colori
replace skintone_int = . if colori == 999999
label var skintone_int "Interviewer Skin Tone"
* support for democracy
gen dem_support = ing4
replace dem_support = . if inlist(ing4, 888888, 988888, 999999)
label var dem_support "Support for Democracy" 
* respect for pol institutions
recode b2  (888888 988888 999999=.)
label var b2 "Respect for Pol. Institutions"
* political tolerance (peaceful demonstration)
recode d2 (888888 988888 999999=.), gen(pol_tolerance)
label var pol_tolerance "Political Tolerance"
* external efficacy
recode eff1 (7=1)(6=2)(5=3)(4=4)(3=5)(2=6)(1=7)(888888 988888 999999=.), gen(ext_eff)
replace ext_eff = (ext_eff - 1) / 6
lab var ext_eff "External Efficacy"
* internal efficacy
recode eff2 (888888 988888 999999=.), gen(int_eff)
replace int_eff = (int_eff - 1) / 6
lab var int_eff "Internal Efficacy"
* efficacy 
gen efficacy = ext_eff + int_eff
replace efficacy = efficacy / 2
lab var efficacy "Political Efficacy"

* recode everything 0-1
replace b2 = (b2 - 1)/6
replace local_insecurity = (local_insecurity - 1)/3
replace trust_police = (trust_police - 1)/6
replace national_econ = (national_econ - 1)/2
replace personal_econ = (personal_econ - 1)/2
replace skintone = (skintone - 1)/9
replace pres_approval = (pres_approval-1)/ 4

* recode educ and age
gen age = (edad - 1)/5
lab var age "Age"
gen educ = (ed_level - 1)/2
lab var educ "Level of Education"
recode q1 (1=0)(2=1), gen(mujer)
lab var mujer "Woman"

** support for democracy
eststo clear
reg dem_support b2 pres_approval kuczynski_2016 nonvoter_2016 center right non_ideo local_insecurity crime_victim trust_police national_econ personal_econ mujer age educ skintone if clean_or_compr == 1
estimates store clean
reg dem_support b2 pres_approval kuczynski_2016 nonvoter_2016 center right non_ideo local_insecurity crime_victim trust_police national_econ personal_econ mujer age educ skintone if clean_or_compr == 2
estimates store compromised
coefplot (clean, label(Clean Data) ciopts(lcolor(black)) mcolor(black)) (compromised, label(Compromised Data) ciopts(lcolor(gs10)) mcolor(gs10) msymbol(Th)), ///
drop(_cons) xline(0) title("") msymbol(circle) scheme(plotplain) graphregion(color(white)) legend(pos(6) row(1)) saving(full_model_d.gph, replace) title("Support for Democracy")

** political tolerance
eststo clear
reg pol_tolerance efficacy educ mujer age if clean_or_compr == 1
estimates store clean
reg pol_tolerance efficacy educ mujer age if clean_or_compr == 2
estimates store compromised
coefplot (clean, label(Clean Data) ciopts(lcolor(black)) mcolor(black)) (compromised, label(Compromised Data) ciopts(lcolor(gs10)) mcolor(gs10) msymbol(Th)), ///
drop(_cons) xline(0) title("") msymbol(circle) scheme(plotplain) graphregion(color(white)) legend(pos(6) row(1)) saving(full_model_t.gph, replace) title("Political Tolerance")

* combined figure
grc1leg2 full_model_d.gph full_model_t.gph, row(1) scheme(plotplain)
